#importing all the needed libraries.
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objects as go
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
#using plotly.offline to get the plot graph correctly in html page.
plotly.offline.init_notebook_mode()
#importing dataset
from sklearn import datasets
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)
# executing the data set to see the all columns
df = pd.DataFrame(data=diabetes_X, columns=datasets.load_diabetes().feature_names)
df['Target'] = diabetes_y
df.head(5)
| age | sex | bmi | bp | s1 | s2 | s3 | s4 | s5 | s6 | Target | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.038076 | 0.050680 | 0.061696 | 0.021872 | -0.044223 | -0.034821 | -0.043401 | -0.002592 | 0.019907 | -0.017646 | 151.0 |
| 1 | -0.001882 | -0.044642 | -0.051474 | -0.026328 | -0.008449 | -0.019163 | 0.074412 | -0.039493 | -0.068332 | -0.092204 | 75.0 |
| 2 | 0.085299 | 0.050680 | 0.044451 | -0.005670 | -0.045599 | -0.034194 | -0.032356 | -0.002592 | 0.002861 | -0.025930 | 141.0 |
| 3 | -0.089063 | -0.044642 | -0.011595 | -0.036656 | 0.012191 | 0.024991 | -0.036038 | 0.034309 | 0.022688 | -0.009362 | 206.0 |
| 4 | 0.005383 | -0.044642 | -0.036385 | 0.021872 | 0.003935 | 0.015596 | 0.008142 | -0.002592 | -0.031988 | -0.046641 | 135.0 |
#splitting the data into train data and test data
X_train, X_test, y_train, y_test = train_test_split(diabetes_X[:,2],diabetes_y, test_size= 0.2, random_state=10)
X_train_reshaped = X_train.reshape(-1,1)
X_train_reshaped
array([[ 0.05954058],
[ 0.08001901],
[-0.02452876],
[ 0.07247433],
[ 0.03367309],
[ 0.05415152],
[-0.03854032],
[-0.00836158],
[-0.06440781],
[ 0.01427248],
[-0.03961813],
[-0.03315126],
[ 0.01858372],
[ 0.01535029],
[-0.00620595],
[-0.046085 ],
[-0.04177375],
[ 0.02828403],
[ 0.08864151],
[-0.01375064],
[-0.06225218],
[ 0.05954058],
[ 0.00888341],
[-0.03315126],
[ 0.03043966],
[-0.06117437],
[-0.0902753 ],
[ 0.01211685],
[-0.06979687],
[-0.00943939],
[ 0.03906215],
[ 0.07462995],
[ 0.06169621],
[ 0.01858372],
[-0.02560657],
[-0.00943939],
[ 0.05954058],
[ 0.01427248],
[-0.03854032],
[-0.03207344],
[-0.046085 ],
[-0.03207344],
[-0.03099563],
[ 0.02612841],
[ 0.06061839],
[ 0.00133873],
[ 0.01750591],
[ 0.0250506 ],
[-0.046085 ],
[ 0.03690653],
[ 0.05415152],
[ 0.08540807],
[ 0.02073935],
[-0.06332999],
[-0.00620595],
[ 0.02612841],
[-0.03422907],
[-0.0547075 ],
[ 0.00457217],
[ 0.00349435],
[-0.00620595],
[ 0.05415152],
[ 0.06816308],
[ 0.01211685],
[ 0.00996123],
[ 0.04445121],
[-0.02560657],
[-0.00297252],
[-0.01267283],
[ 0.04013997],
[ 0.04013997],
[ 0.07786339],
[-0.04500719],
[-0.04177375],
[-0.05686312],
[-0.03530688],
[ 0.06924089],
[-0.06009656],
[ 0.07139652],
[ 0.00133873],
[ 0.02397278],
[ 0.00241654],
[ 0.00672779],
[-0.02452876],
[-0.00297252],
[-0.03099563],
[-0.03207344],
[-0.00943939],
[ 0.02289497],
[ 0.11127556],
[ 0.13714305],
[-0.00512814],
[ 0.05846277],
[-0.00728377],
[-0.00943939],
[-0.01267283],
[-0.0547075 ],
[ 0.00457217],
[-0.03099563],
[-0.08488624],
[-0.01806189],
[-0.01159501],
[ 0.00564998],
[-0.02560657],
[-0.06656343],
[ 0.04552903],
[-0.04069594],
[ 0.03259528],
[ 0.10480869],
[ 0.06708527],
[-0.02991782],
[-0.03315126],
[-0.08919748],
[ 0.01427248],
[-0.01267283],
[ 0.06492964],
[-0.05901875],
[-0.04177375],
[-0.00405033],
[ 0.01966154],
[ 0.00349435],
[-0.02237314],
[-0.02560657],
[-0.07626374],
[-0.01590626],
[ 0.06169621],
[-0.00189471],
[ 0.01427248],
[-0.02452876],
[ 0.00457217],
[-0.06440781],
[ 0.06924089],
[-0.03099563],
[ 0.00672779],
[ 0.08864151],
[ 0.02289497],
[ 0.01535029],
[ 0.07139652],
[-0.06548562],
[-0.04716281],
[ 0.0347509 ],
[-0.02021751],
[-0.02991782],
[-0.02345095],
[-0.02129532],
[-0.02129532],
[-0.00620595],
[-0.02991782],
[-0.00836158],
[ 0.06169621],
[-0.00728377],
[-0.04824063],
[-0.03315126],
[ 0.17055523],
[-0.04931844],
[-0.00728377],
[-0.03638469],
[ 0.01535029],
[-0.04500719],
[-0.01375064],
[ 0.00996123],
[ 0.06061839],
[-0.05578531],
[ 0.04660684],
[ 0.04984027],
[-0.0374625 ],
[ 0.0164281 ],
[-0.03422907],
[-0.0191397 ],
[ 0.04229559],
[-0.07518593],
[-0.01159501],
[ 0.03906215],
[ 0.03043966],
[ 0.00133873],
[-0.0105172 ],
[-0.08057499],
[ 0.03367309],
[-0.02452876],
[ 0.04984027],
[-0.03315126],
[-0.046085 ],
[ 0.00457217],
[ 0.03906215],
[-0.03638469],
[ 0.12528712],
[ 0.00672779],
[-0.07734155],
[-0.0730303 ],
[ 0.04121778],
[ 0.01427248],
[-0.02129532],
[ 0.09403057],
[-0.02345095],
[-0.02668438],
[-0.01159501],
[ 0.01858372],
[ 0.02612841],
[-0.04392938],
[-0.07410811],
[ 0.04445121],
[-0.06548562],
[-0.07087468],
[-0.0816528 ],
[-0.03422907],
[ 0.10480869],
[ 0.05954058],
[-0.05686312],
[-0.01590626],
[-0.06656343],
[-0.046085 ],
[ 0.00133873],
[-0.00081689],
[ 0.00133873],
[ 0.01750591],
[-0.04177375],
[-0.02991782],
[ 0.00564998],
[-0.04069594],
[-0.03530688],
[-0.05794093],
[-0.01482845],
[ 0.00996123],
[-0.00405033],
[ 0.05630715],
[ 0.05415152],
[ 0.05954058],
[ 0.0433734 ],
[-0.00728377],
[ 0.04660684],
[-0.01806189],
[ 0.097264 ],
[-0.05578531],
[-0.02560657],
[-0.06764124],
[ 0.03690653],
[-0.05578531],
[ 0.04984027],
[-0.01590626],
[-0.00836158],
[-0.02021751],
[-0.05039625],
[-0.05794093],
[ 0.0164281 ],
[-0.02560657],
[ 0.04552903],
[ 0.00672779],
[-0.02021751],
[-0.01159501],
[ 0.06061839],
[-0.06225218],
[-0.06656343],
[-0.05578531],
[-0.03530688],
[ 0.00241654],
[-0.01482845],
[-0.00405033],
[-0.06871905],
[-0.0105172 ],
[ 0.00026092],
[-0.0374625 ],
[-0.01806189],
[ 0.07139652],
[-0.03961813],
[ 0.08864151],
[ 0.02073935],
[ 0.01750591],
[ 0.0703187 ],
[ 0.03906215],
[ 0.07139652],
[-0.03207344],
[ 0.0433734 ],
[ 0.00564998],
[ 0.05522933],
[ 0.00888341],
[ 0.02073935],
[ 0.07678558],
[ 0.00564998],
[-0.00620595],
[-0.00189471],
[-0.05794093],
[ 0.09295276],
[-0.02884001],
[-0.03638469],
[-0.05362969],
[ 0.06816308],
[-0.07195249],
[ 0.05630715],
[ 0.05522933],
[-0.02237314],
[ 0.03043966],
[ 0.05630715],
[ 0.12852056],
[ 0.00564998],
[ 0.03367309],
[-0.0105172 ],
[ 0.01858372],
[-0.01590626],
[ 0.03582872],
[ 0.00241654],
[-0.02345095],
[ 0.04768465],
[-0.01698407],
[-0.0277622 ],
[-0.06440781],
[-0.0105172 ],
[-0.05255187],
[-0.02237314],
[ 0.05307371],
[-0.06548562],
[ 0.0519959 ],
[ 0.01427248],
[-0.04824063],
[-0.06548562],
[-0.0547075 ],
[ 0.02181716],
[-0.02021751],
[ 0.04768465],
[ 0.06061839],
[-0.046085 ],
[-0.00189471],
[-0.03638469],
[ 0.0164281 ],
[-0.01590626],
[ 0.02720622],
[ 0.01103904],
[-0.05794093],
[-0.05039625],
[-0.00728377],
[ 0.08109682],
[-0.02452876],
[-0.01159501],
[ 0.0250506 ],
[ 0.00133873],
[ 0.01966154],
[-0.03315126],
[ 0.04229559],
[-0.00943939],
[-0.03638469],
[-0.00836158],
[ 0.16085492],
[-0.02021751],
[ 0.06169621],
[ 0.01966154],
[-0.00081689],
[-0.03854032],
[-0.0105172 ],
[ 0.0347509 ],
[ 0.03798434],
[ 0.03043966],
[-0.01806189],
[-0.00836158],
[-0.02560657]])
y_train
array([ 91., 257., 84., 131., 150., 293., 127., 39., 83., 191., 65.,
65., 265., 242., 219., 69., 178., 170., 279., 83., 45., 178.,
174., 214., 202., 99., 94., 235., 158., 59., 220., 275., 242.,
113., 71., 81., 85., 297., 253., 104., 77., 53., 91., 283.,
215., 51., 173., 122., 178., 70., 142., 306., 197., 37., 134.,
52., 190., 42., 48., 73., 283., 192., 121., 144., 235., 141.,
170., 139., 202., 180., 155., 233., 93., 103., 68., 125., 277.,
70., 295., 196., 121., 245., 277., 66., 141., 172., 87., 257.,
265., 336., 233., 116., 136., 248., 60., 160., 143., 200., 102.,
90., 168., 94., 249., 252., 79., 232., 97., 208., 270., 78.,
108., 84., 104., 220., 91., 95., 64., 185., 61., 237., 129.,
49., 199., 134., 104., 281., 109., 142., 200., 191., 170., 220.,
42., 109., 310., 232., 96., 252., 214., 72., 236., 97., 88.,
71., 107., 281., 164., 160., 177., 151., 155., 96., 94., 242.,
128., 182., 102., 201., 90., 216., 210., 186., 63., 99., 144.,
128., 268., 140., 219., 137., 55., 190., 85., 172., 49., 168.,
72., 198., 89., 280., 168., 72., 107., 310., 50., 341., 131.,
116., 57., 198., 90., 131., 263., 71., 152., 181., 248., 196.,
71., 48., 129., 153., 49., 85., 120., 321., 268., 183., 53.,
77., 74., 142., 115., 229., 139., 75., 118., 311., 71., 52.,
162., 97., 150., 68., 310., 187., 263., 195., 95., 174., 77.,
275., 109., 98., 55., 90., 135., 221., 104., 81., 241., 189.,
88., 225., 118., 272., 182., 233., 206., 245., 115., 59., 39.,
53., 123., 124., 198., 72., 61., 259., 113., 101., 55., 78.,
264., 246., 69., 288., 246., 141., 78., 103., 60., 173., 127.,
281., 332., 138., 262., 44., 63., 140., 88., 135., 83., 259.,
77., 303., 68., 156., 222., 272., 259., 67., 270., 179., 273.,
132., 248., 85., 64., 258., 90., 209., 96., 97., 181., 84.,
163., 59., 230., 118., 111., 96., 60., 178., 148., 317., 132.,
47., 185., 200., 212., 151., 225., 111., 158., 87., 52., 180.,
58., 200., 182., 258., 262., 54., 166., 197., 146., 100., 346.,
111., 110., 178., 113., 93., 25., 84., 167., 122., 171., 161.,
74.])
#training the model
model = LinearRegression()
model.fit(X_train_reshaped, y_train)
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearRegression()
#making prediction for both train data and test data
y_train_pred = model.predict(X_train_reshaped)
y_test_pred = model.predict(X_test.reshape(-1,1))
y_train_pred
array([209.39165292, 229.00991657, 128.85351797, 221.78213522,
184.61068832, 204.22895196, 115.43049548, 144.34162084,
90.64953087, 166.02496487, 114.39795528, 120.59319643,
170.15512564, 167.05750506, 146.40670123, 108.20271413,
112.3328749 , 179.44798736, 237.2702381 , 139.17891989,
92.71461126, 209.39165292, 160.86226391, 120.59319643,
181.51306775, 93.74715145, 65.86856627, 163.95988449,
85.48682991, 143.30908065, 189.77338928, 223.84721561,
211.45673331, 170.15512564, 127.82097778, 143.30908065,
209.39165292, 166.02496487, 115.43049548, 121.62573663,
108.20271413, 121.62573663, 122.65827682, 177.38290698,
210.42419311, 153.63448257, 169.12258545, 176.35036679,
108.20271413, 187.7083089 , 204.22895196, 234.17261752,
172.22020602, 91.68207107, 146.40670123, 177.38290698,
119.56065624, 99.9423926 , 156.73210314, 155.69956295,
146.40670123, 204.22895196, 217.65197446, 163.95988449,
161.8948041 , 194.93609024, 127.82097778, 149.5043218 ,
140.21146008, 190.80592947, 190.80592947, 226.94483618,
109.23525432, 112.3328749 , 97.87731222, 118.52811605,
218.68451465, 94.77969164, 220.74959503, 153.63448257,
175.3178266 , 154.66702276, 158.79718353, 128.85351797,
149.5043218 , 122.65827682, 121.62573663, 143.30908065,
174.2852864 , 258.95358213, 283.73454673, 147.43924142,
208.35911273, 145.37416104, 143.30908065, 140.21146008,
99.9423926 , 156.73210314, 122.65827682, 71.03126723,
135.04875912, 141.24400027, 157.76464334, 127.82097778,
88.58445049, 195.96863043, 113.36541509, 183.57814813,
252.75834098, 216.61943427, 123.69081701, 120.59319643,
66.90110646, 166.02496487, 140.21146008, 214.55435388,
95.81223183, 112.3328749 , 148.47178161, 171.18766583,
155.69956295, 130.91859835, 127.82097778, 79.29158876,
137.1138395 , 211.45673331, 150.53686199, 166.02496487,
128.85351797, 156.73210314, 90.64953087, 218.68451465,
122.65827682, 158.79718353, 237.2702381 , 174.2852864 ,
167.05750506, 220.74959503, 89.61699068, 107.17017394,
185.64322851, 132.98367873, 123.69081701, 129.88605816,
131.95113854, 131.95113854, 146.40670123, 123.69081701,
144.34162084, 211.45673331, 145.37416104, 106.13763375,
120.59319643, 315.74329267, 105.10509356, 145.37416104,
117.49557586, 167.05750506, 109.23525432, 139.17891989,
161.8948041 , 210.42419311, 98.90985241, 197.00117062,
200.0987912 , 116.46303567, 168.09004525, 119.56065624,
134.01621893, 192.87100986, 80.32412896, 141.24400027,
189.77338928, 181.51306775, 153.63448257, 142.27654046,
75.161428 , 184.61068832, 128.85351797, 200.0987912 ,
120.59319643, 108.20271413, 156.73210314, 189.77338928,
117.49557586, 272.37660462, 158.79718353, 78.25904857,
82.38920934, 191.83846966, 166.02496487, 131.95113854,
242.43293906, 129.88605816, 126.78843758, 141.24400027,
170.15512564, 177.38290698, 110.26779452, 81.35666915,
194.93609024, 89.61699068, 84.45428972, 74.12888781,
119.56065624, 252.75834098, 209.39165292, 97.87731222,
137.1138395 , 88.58445049, 108.20271413, 153.63448257,
151.56940219, 153.63448257, 169.12258545, 112.3328749 ,
123.69081701, 157.76464334, 113.36541509, 118.52811605,
96.84477202, 138.14637969, 161.8948041 , 148.47178161,
206.29403235, 204.22895196, 209.39165292, 193.90355005,
145.37416104, 197.00117062, 135.04875912, 245.53055963,
98.90985241, 127.82097778, 87.5519103 , 187.7083089 ,
98.90985241, 200.0987912 , 137.1138395 , 144.34162084,
132.98367873, 104.07255337, 96.84477202, 168.09004525,
127.82097778, 195.96863043, 158.79718353, 132.98367873,
141.24400027, 210.42419311, 92.71461126, 88.58445049,
98.90985241, 118.52811605, 154.66702276, 138.14637969,
148.47178161, 86.51937011, 142.27654046, 152.60194238,
116.46303567, 135.04875912, 220.74959503, 114.39795528,
237.2702381 , 172.22020602, 169.12258545, 219.71705484,
189.77338928, 220.74959503, 121.62573663, 193.90355005,
157.76464334, 205.26149216, 160.86226391, 172.22020602,
225.91229599, 157.76464334, 146.40670123, 150.53686199,
96.84477202, 241.40039887, 124.7233572 , 117.49557586,
100.97493279, 217.65197446, 83.42174953, 206.29403235,
205.26149216, 130.91859835, 181.51306775, 206.29403235,
275.47422519, 157.76464334, 184.61068832, 142.27654046,
170.15512564, 137.1138395 , 186.6757687 , 154.66702276,
129.88605816, 198.03371081, 136.08129931, 125.75589739,
90.64953087, 142.27654046, 102.00747298, 130.91859835,
203.19641177, 89.61699068, 202.16387158, 166.02496487,
106.13763375, 89.61699068, 99.9423926 , 173.25274621,
132.98367873, 198.03371081, 210.42419311, 108.20271413,
150.53686199, 117.49557586, 168.09004525, 137.1138395 ,
178.41544717, 162.92734429, 96.84477202, 104.07255337,
145.37416104, 230.04245676, 128.85351797, 141.24400027,
176.35036679, 153.63448257, 171.18766583, 120.59319643,
192.87100986, 143.30908065, 117.49557586, 144.34162084,
306.45043095, 132.98367873, 211.45673331, 171.18766583,
151.56940219, 115.43049548, 142.27654046, 185.64322851,
188.74084909, 181.51306775, 135.04875912, 144.34162084,
127.82097778])
y_test_pred
array([107.17017394, 195.96863043, 158.79718353, 136.08129931,
161.8948041 , 179.44798736, 128.85351797, 274.441685 ,
113.36541509, 129.88605816, 129.88605816, 148.47178161,
186.6757687 , 76.19396819, 132.98367873, 262.0512027 ,
148.47178161, 113.36541509, 214.55435388, 93.74715145,
128.85351797, 244.49801944, 107.17017394, 208.35911273,
85.48682991, 122.65827682, 144.34162084, 114.39795528,
169.12258545, 116.46303567, 246.56309983, 241.40039887,
94.77969164, 118.52811605, 182.54560794, 156.73210314,
103.04001317, 195.96863043, 270.31152424, 140.21146008,
151.56940219, 134.01621893, 222.81467542, 147.43924142,
91.68207107, 144.34162084, 92.71461126, 74.12888781,
257.92104193, 115.43049548, 123.69081701, 95.81223183,
82.38920934, 241.40039887, 210.42419311, 122.65827682,
129.88605816, 147.43924142, 175.3178266 , 150.53686199,
149.5043218 , 109.23525432, 122.65827682, 72.06380742,
78.25904857, 234.17261752, 190.80592947, 111.30033471,
96.84477202, 156.73210314, 220.74959503, 147.43924142,
122.65827682, 85.48682991, 213.52181369, 202.16387158,
163.95988449, 115.43049548, 113.36541509, 128.85351797,
181.51306775, 130.91859835, 191.83846966, 179.44798736,
162.92734429, 135.04875912, 114.39795528, 108.20271413,
124.7233572 ])
#calculating the loss with mean squared error of both train and test data
train_loss = metrics.mean_squared_error(y_train, y_train_pred)
test_loss = metrics.mean_squared_error(y_test, y_test_pred)
Plotting the Graph using Matplotlib
#plotting the graph using matplotlib
plt.scatter(X_train, y_train, color='gold', label='Training Data')
plt.scatter(X_test, y_test, color='purple', label='Test Data')
plt.plot(X_test, y_test_pred, color='cyan', label='Linear Regression Model')
plt.xlabel('Body Mass Index (BMI)')
plt.ylabel('Disease Progression')
plt.legend()
plt.show()
Plotting the Graph using Plotly
#plotting the graph using plotly
fig = go.Figure()
fig.add_trace(go.Scatter(x=X_train, y=y_train, mode='markers', name='Training Data'))
fig.add_trace(go.Scatter(x=X_test, y=y_test, mode='markers', name='Test Data'))
fig.add_trace(go.Scatter(x=X_test, y=y_test_pred, mode='lines', name='Linear Regression Model'))
fig.update_layout(title='Linear Regression - BMI vs Quantitative Measure Disease Progression',
xaxis_title='Body Mass Index (BMI)',
yaxis_title='Disease Progression')
fig.show()
Plotting the Graph using Seaborn
#plotting the graph using seaborn
sns.scatterplot(x=X_train, y=y_train, label='Training Data')
sns.scatterplot(x=X_test, y=y_test, label='Test Data', color='orange')
sns.lineplot(x=X_test, y=y_test_pred, color='green', label='Linear Regression Model')
plt.xlabel('Body Mass Index (BMI)')
plt.ylabel('Disease Progression')
plt.title('Linear Regression - BMI vs Quantitative Measure Disease Progression')
plt.legend()
plt.show()
print("Model Parameters\n")
print("Slope (Coefficient):%.2f" %model.coef_[0])
print("Intercept:%.2f" %model.intercept_)
print("Loss on Training Data (Mean Squared Value):%.2f" %train_loss)
print("Loss on Test Data (Mean Squared Value): %.2f" %test_loss)
Model Parameters Slope (Coefficient):958.00 Intercept:152.35 Loss on Training Data (Mean Squared Value):3879.72 Loss on Test Data (Mean Squared Value): 3934.09
Conclusion: